************************************************
***Title: mechanisms_trends.do
***Creators: Joelle Abramowitz, Shooshan Danagoulian, and Owen Fleming*
***Notes: This file produces the estimates of the effect of pollen exposure on Google search interest in terms related to allergy symptoms, depression, and exhaustion. To do this, we create a dataset including the trends information. This requires the production of a pollen dataset that does not drop pollen observations that are not in the NVDRS-pollen overlap. Therefore, 

*For questions, contact
*Owen Fleming
*hg3490@wayne.edu
************************************************


**********SETUP
cd ${path}/data/trends


***Start by constructing the pollen dataset
*Open pollen data
use ${path}/data/pollen/pollen.dta, clear

*Merge suicides (note that we no longer keep only the observations in the coverage overlap between pollen-NVDRS), then replace missing zeroes
merge 1:m county date using ${path}/data/nvdrs/nvdrs
drop if _merge==2
drop _merge

replace count=0 if missing(count)
foreach i of varlist count_* {
	replace `i' = 0 if missing(`i')
}

*Merge weather
merge 1:1 county date using ${path}/data/weather/weather.dta
drop if _merge==2
drop _merge

*Merge wind
merge 1:1 county date using ${path}/data/wind/wind.dta
drop if _merge==2
drop _merge

*Merge PM 2.5
merge 1:1 county date using ${path}/data/pm2pt5/pm2pt5.dta
drop if _merge==2
drop _merge

*Merge AQI
merge 1:1 county date using ${path}/data/aqi/aqi.dta
drop if _merge==2
drop _merge

*Oak masting
merge m:1 county_measurement year using ${path}/data/oak_masting/oak_masting.dta
drop if _merge==2
drop _merge

*Do some more cleaning
rename totalpollen pollen
rename totalspore spore

*There is some weirdness in the pollen measure - sometimes < 0
replace pollen = treepollen + grassweedpollen if pollen < 0
replace pollen = 0 if pollen < 0

replace treepollen = 0 if pollen==0
replace grassweedpollen = 0 if pollen==0

replace pollen = 0 if missing(pollen)

*Season
gen season = "fall" if month==9|month==10|month==11
replace season = "winter" if month==12|month==1|month==2
replace season = "spring" if month==3|month==4|month==5
replace season = "summer" if month==6|month==7|month==8	

*Location-season quartiles
foreach j in pollen treepollen grassweedpollen spore {
egen `j'_quartile_ls = xtile(`j'), n(4) by(county season)
foreach i in 1 2 3 4 {
	gen `j'_q`i'_ls = (`j'_quartile_ls == `i') if !missing(`j'_quartile_ls)
	bysort county (date): gen `j'_q`i'_ls_1 = `j'_q`i'_ls[_n-1]
	bysort county (date): gen `j'_q`i'_ls_2 = `j'_q`i'_ls[_n-2]
	gen `j'_qi`i'_ls = `j' * `j'_q`i'_ls
	bysort county (date): gen `j'_qi`i'_ls_1 = `j'_qi`i'_ls[_n-1]
	bysort county (date): gen `j'_qi`i'_ls_2 = `j'_qi`i'_ls[_n-2]
}
}

*Precipitation indicator
gen any_precip = (ppt > 0)

*Quadratics
foreach i in tmax tmin tmean ppt wind_speed {
	gen `i'_sq = `i'^2
}

*Some fixed effects
egen year_month = group(year month)
egen month_day = group(month day)

*Save
save pollen_for_trends, replace


***Next, construct the trends dataset
*Import and clean the trends data
use trends_raw, clear
rename date date_
gen date = dofc(date_)
format date %td
drop date_

gen fipscounty = 8041 if region == "Colorado Springs-Pueblo, CO"
replace fipscounty = 41039 if region == "Eugene, OR"
replace fipscounty = 13067 if region == "Atlanta, GA"
replace fipscounty = 24005 if region == "Baltimore, MD"
replace fipscounty = 37119 if region == "Charlotte, NC"
replace fipscounty = 17031 if region == "Chicago, IL"
replace fipscounty = 48121 if region == "Dallas-Ft. Worth, TX"
replace fipscounty = 39113 if region == "Dayton, OH"
replace fipscounty = 10003 if region == "Delaware"
replace fipscounty = 26099 if region=="Detroit, MI"
replace fipscounty = 42049 if region =="Erie, PA"
replace fipscounty = 42133 if region == "Harrisburg-Lancaster-Lebanon-York, PA"
replace fipscounty = 9009 if region == "Hartford & New Haven, CT"
replace fipscounty = 48201 if region =="Houston, TX"
replace fipscounty = 29095 if region =="Kansas city, MO"
replace fipscounty = 47093 if region == "Knoxville, TN"
replace fipscounty=55063 if region == "La Crosse-Eau Claire, WI"
replace fipscounty = 6001 if region=="Los Angeles, CA"
replace fipscounty = 21111 if region == "Louisville, KY"
replace fipscounty = 36061 if region == "New York, NY"
replace fipscounty = 40109 if region == "Oklahoma City, OK"
replace fipscounty = 31153 if region == "Omaha, NE"
replace fipscounty = 36055 if region == "Rochester, NY"
replace fipscounty = 49035 if region == "Salt Lake City, UT"
replace fipscounty = 48491 if region == "San Antonio, TX"
replace fipscounty = 6085 if region =="San Francisco-Oakland-San Jose, CA"
replace fipscounty = 53033 if region == "Seattle-Tacoma, WA"
replace fipscounty = 29510 if region == "St. Louis, MO"
replace fipscounty = 40143 if region == "Tulsa, OK"
replace fipscounty = 16083 if region == "Twin Falls, ID"
replace fipscounty = 27053 if region == "Minneapolis-St.Paul, MN"
replace fipscounty = 41051 if region == "Portland, OR"
replace fipscounty = 48309 if region == "Waco-Temple-Bryan, TX"
replace fipscounty = 45045 if region == "Greensboro-High Point Wingston Salem, NC"
drop if missing(fipscounty)

duplicates drop region date, force

rename fipscounty county

save trends, replace

*Restore pollen data, keep only the measurement localities, and merge trends data
use pollen_for_trends, clear
keep if county == county_measurement

*way to do this will be to drop if the merge failed. Then save that as temp. Then do another merge on the locations that failed. Then append together
merge 1:1 county date using trends
keep if _merge==3
drop _merge
save temp, replace

use trends, clear
drop county
gen county = 48041 if region =="Houston, TX"
replace county = 36009 if region =="Rochester, NY"
replace county = 24510 if region == "Baltimore, MD"
replace county = 31055 if region == "Omaha, NE"
drop if missing(county)

save trends, replace

*import again and merge the missing
use pollen_for_trends, clear
keep if county == county_measurement

merge 1:1 county date using trends
keep if _merge==3 
drop _merge

append using temp
erase temp.dta

rename allergy___congestion____runny_no t_allergy
rename feeling___feel___depressed___dep t_depressive
rename sleep___sleepy___tired___exhaust t_exhaustion


**********PRODUCE ESTIMATES
foreach i in t_allergy t_depressive t_exhaustion {

*Treatment: location-season quartiles
eststo s3_`i': ppmlhdfe `i' pollen_q2_ls pollen_q3_ls pollen_q4_ls $weather, absorb(county year_month month_day) cluster(county) tolerance(1e-06)
estadd ysumm
estadd scalar counties = e(N_clust)
estadd local Controls "Weather" 
estadd local FE "County, Year x Month, Month x Day"

}


**********REVERT DIRECTORY
cd ${path}


**********EXPORT
estout using results/mechanisms_trends.xls, cells(b(star label(Coef.) fmt(4)) se(par(`"="("'`")""') label(Std. Err.) fmt(4))) stats(ymean N counties Controls FE) starlevels(* 0.1 ** 0.05 *** 0.01) keep(pollen_q2_ls pollen_q3_ls pollen_q4_ls) legend label replace 
eststo clear






